#导入模块requests,re
import requests
import re
import json


#定义请求url
url = input('输入打印得网址：')
# url = "https://wkbjcloudbos.bdimg.com/v1/docconvert973/wk/0211338e9268a692e3d7471debe56ac0/0.json?responseContentType=application%2Fjavascript&responseCacheControl=max-age%3D3888000&responseExpires=Sat%2C%2021%20Aug%202021%2020%3A11%3A21%20%2B0800&authorization=bce-auth-v1%2Ffa1126e91489401fa7cc85045ce7179e%2F2021-07-07T12%3A11%3A21Z%2F3600%2Fhost%2F1eb02caea266097df59624c35a57c0fb25096f4689bd32707657d1697a35d80b&x-bce-range=58273-106270&token=eyJ0eXAiOiJKSVQiLCJ2ZXIiOiIxLjAiLCJhbGciOiJIUzI1NiIsImV4cCI6MTYyNTY2MzQ4MSwidXJpIjp0cnVlLCJwYXJhbXMiOlsicmVzcG9uc2VDb250ZW50VHlwZSIsInJlc3BvbnNlQ2FjaGVDb250cm9sIiwicmVzcG9uc2VFeHBpcmVzIiwieC1iY2UtcmFuZ2UiXX0%3D.30w0OT1bscLyPQPtFuzikxuaRW%2BbebBjA8ItxZ9MPQ0%3D.1625663481"
#定义请求头
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/76.0.3809.100 Safari/537.36"
}
#请求内容
content = requests.get(url=url,headers=headers)
#设置编码方式
content.encoding = "unicode_escape"
#正则提取所需要的内容, 两个括号，提取两个值 返回一个列表 内包含元组两个元素
res = re.findall('(\w+)\((.*)\)', content.text)  # wenku_3(.*)
result_text = "" # 用于接收拼接最终得值
file_name = ""

if  res:
    print(res[0][-1])
    res_dict = json.loads(res[0][-1])
    file_name = res[0][0]
    print(file_name)
    body_list = res_dict['body']
    for index, body in enumerate(body_list):
        print(index, body['c'])
        c = body['c']
        result_text += str(c) #转换为字符串  有表格形式出现 dict类型
    result_1 = re.sub(' {4}', '\n', result_text)  # 处理四个空格就是换行
    result_2 = re.sub(' {3}', '\n\t', result_1) # 处理三个恐吓就是 换行加Tab键
    with open('文档_{}.txt'.format(file_name),'w',encoding='utf-8') as f:
        f.write(result_2)
else:
    print('未获取到内容')